library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.0.0 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
devtools::install_github("ericonsi/EHData")
## Skipping install of 'EHData' from a github remote, the SHA1 (ccd698bd) has not changed since last install.
## Use `force = TRUE` to force installation
library(EHData)
library(patchwork)
## Warning: package 'patchwork' was built under R version 4.0.5
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(ggsci)
## Warning: package 'ggsci' was built under R version 4.0.5
library(caret)
## Warning: package 'caret' was built under R version 4.0.5
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
library(pROC)
## Warning: package 'pROC' was built under R version 4.0.5
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(car)
## Warning: package 'car' was built under R version 4.0.5
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
df <- read.csv("D:\\RStudio\\CUNY_621\\Assignment 3\\crime-training-data_modified.csv")
summary(df)
## zn indus chas nox
## Min. : 0.00 Min. : 0.460 Min. :0.00000 Min. :0.3890
## 1st Qu.: 0.00 1st Qu.: 5.145 1st Qu.:0.00000 1st Qu.:0.4480
## Median : 0.00 Median : 9.690 Median :0.00000 Median :0.5380
## Mean : 11.58 Mean :11.105 Mean :0.07082 Mean :0.5543
## 3rd Qu.: 16.25 3rd Qu.:18.100 3rd Qu.:0.00000 3rd Qu.:0.6240
## Max. :100.00 Max. :27.740 Max. :1.00000 Max. :0.8710
## rm age dis rad
## Min. :3.863 Min. : 2.90 Min. : 1.130 Min. : 1.00
## 1st Qu.:5.887 1st Qu.: 43.88 1st Qu.: 2.101 1st Qu.: 4.00
## Median :6.210 Median : 77.15 Median : 3.191 Median : 5.00
## Mean :6.291 Mean : 68.37 Mean : 3.796 Mean : 9.53
## 3rd Qu.:6.630 3rd Qu.: 94.10 3rd Qu.: 5.215 3rd Qu.:24.00
## Max. :8.780 Max. :100.00 Max. :12.127 Max. :24.00
## tax ptratio lstat medv
## Min. :187.0 Min. :12.6 Min. : 1.730 Min. : 5.00
## 1st Qu.:281.0 1st Qu.:16.9 1st Qu.: 7.043 1st Qu.:17.02
## Median :334.5 Median :18.9 Median :11.350 Median :21.20
## Mean :409.5 Mean :18.4 Mean :12.631 Mean :22.59
## 3rd Qu.:666.0 3rd Qu.:20.2 3rd Qu.:16.930 3rd Qu.:25.00
## Max. :711.0 Max. :22.0 Max. :37.970 Max. :50.00
## target
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.4914
## 3rd Qu.:1.0000
## Max. :1.0000
str(df)
## 'data.frame': 466 obs. of 13 variables:
## $ zn : num 0 0 0 30 0 0 0 0 0 80 ...
## $ indus : num 19.58 19.58 18.1 4.93 2.46 ...
## $ chas : int 0 1 0 0 0 0 0 0 0 0 ...
## $ nox : num 0.605 0.871 0.74 0.428 0.488 0.52 0.693 0.693 0.515 0.392 ...
## $ rm : num 7.93 5.4 6.49 6.39 7.16 ...
## $ age : num 96.2 100 100 7.8 92.2 71.3 100 100 38.1 19.1 ...
## $ dis : num 2.05 1.32 1.98 7.04 2.7 ...
## $ rad : int 5 5 24 6 3 5 24 24 5 1 ...
## $ tax : int 403 403 666 300 193 384 666 666 224 315 ...
## $ ptratio: num 14.7 14.7 20.2 16.6 17.8 20.9 20.2 20.2 20.2 16.4 ...
## $ lstat : num 3.7 26.82 18.85 5.19 4.82 ...
## $ medv : num 50 13.4 15.4 23.7 37.9 26.5 5 7 22.2 20.9 ...
## $ target : int 1 1 1 0 0 0 1 1 0 0 ...
library(psych)
## Warning: package 'psych' was built under R version 4.0.5
##
## Attaching package: 'psych'
## The following object is masked from 'package:car':
##
## logit
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
EHSummarize_StandardPlots(df, "target", type="box")
EHExplore_Multicollinearity(df, run_all=TRUE)
## corrplot 0.92 loaded
## zn indus chas nox rm age
## zn 1.00000000 -0.53826643 -0.04016203 -0.51704518 0.31981410 -0.57258054
## indus -0.53826643 1.00000000 0.06118317 0.75963008 -0.39271181 0.63958182
## chas -0.04016203 0.06118317 1.00000000 0.09745577 0.09050979 0.07888366
## nox -0.51704518 0.75963008 0.09745577 1.00000000 -0.29548972 0.73512782
## rm 0.31981410 -0.39271181 0.09050979 -0.29548972 1.00000000 -0.23281251
## age -0.57258054 0.63958182 0.07888366 0.73512782 -0.23281251 1.00000000
## dis 0.66012434 -0.70361886 -0.09657711 -0.76888404 0.19901584 -0.75089759
## rad -0.31548119 0.60062839 -0.01590037 0.59582984 -0.20844570 0.46031430
## tax -0.31928408 0.73222922 -0.04676476 0.65387804 -0.29693430 0.51212452
## ptratio -0.39103573 0.39468980 -0.12866058 0.17626871 -0.36034706 0.25544785
## lstat -0.43299252 0.60711023 -0.05142322 0.59624264 -0.63202445 0.60562001
## medv 0.37671713 -0.49617432 0.16156528 -0.43012267 0.70533679 -0.37815605
## target -0.43168176 0.60485074 0.08004187 0.72610622 -0.15255334 0.63010625
## dis rad tax ptratio lstat medv
## zn 0.66012434 -0.31548119 -0.31928408 -0.3910357 -0.43299252 0.3767171
## indus -0.70361886 0.60062839 0.73222922 0.3946898 0.60711023 -0.4961743
## chas -0.09657711 -0.01590037 -0.04676476 -0.1286606 -0.05142322 0.1615653
## nox -0.76888404 0.59582984 0.65387804 0.1762687 0.59624264 -0.4301227
## rm 0.19901584 -0.20844570 -0.29693430 -0.3603471 -0.63202445 0.7053368
## age -0.75089759 0.46031430 0.51212452 0.2554479 0.60562001 -0.3781560
## dis 1.00000000 -0.49499193 -0.53425464 -0.2333394 -0.50752800 0.2566948
## rad -0.49499193 1.00000000 0.90646323 0.4714516 0.50310125 -0.3976683
## tax -0.53425464 0.90646323 1.00000000 0.4744223 0.56418864 -0.4900329
## ptratio -0.23333940 0.47145160 0.47442229 1.0000000 0.37735605 -0.5159153
## lstat -0.50752800 0.50310125 0.56418864 0.3773560 1.00000000 -0.7358008
## medv 0.25669476 -0.39766826 -0.49003287 -0.5159153 -0.73580078 1.0000000
## target -0.61867312 0.62810492 0.61111331 0.2508489 0.46912702 -0.2705507
## target
## zn -0.43168176
## indus 0.60485074
## chas 0.08004187
## nox 0.72610622
## rm -0.15255334
## age 0.63010625
## dis -0.61867312
## rad 0.62810492
## tax 0.61111331
## ptratio 0.25084892
## lstat 0.46912702
## medv -0.27055071
## target 1.00000000
dfM <- df %>%
mutate(znM = zn*-1, rmM = rm*-1 )
#df <- EHPrepare_ScaleAllButTarget(df, "target")
df22 <- df %>%
mutate(ptOver13 = as.numeric(ifelse(ptratio>13,1,0)))
str(df22)
## 'data.frame': 466 obs. of 14 variables:
## $ zn : num 0 0 0 30 0 0 0 0 0 80 ...
## $ indus : num 19.58 19.58 18.1 4.93 2.46 ...
## $ chas : int 0 1 0 0 0 0 0 0 0 0 ...
## $ nox : num 0.605 0.871 0.74 0.428 0.488 0.52 0.693 0.693 0.515 0.392 ...
## $ rm : num 7.93 5.4 6.49 6.39 7.16 ...
## $ age : num 96.2 100 100 7.8 92.2 71.3 100 100 38.1 19.1 ...
## $ dis : num 2.05 1.32 1.98 7.04 2.7 ...
## $ rad : int 5 5 24 6 3 5 24 24 5 1 ...
## $ tax : int 403 403 666 300 193 384 666 666 224 315 ...
## $ ptratio : num 14.7 14.7 20.2 16.6 17.8 20.9 20.2 20.2 20.2 16.4 ...
## $ lstat : num 3.7 26.82 18.85 5.19 4.82 ...
## $ medv : num 50 13.4 15.4 23.7 37.9 26.5 5 7 22.2 20.9 ...
## $ target : int 1 1 1 0 0 0 1 1 0 0 ...
## $ ptOver13: num 1 1 1 1 1 1 1 1 1 1 ...
EHExplore_Interactions_Scatterplots(df22, "target", "ptOver13")
## [[1]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[2]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[3]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[4]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[5]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[6]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[7]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[8]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[9]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[10]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[11]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[12]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[13]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[14]]
## `geom_smooth()` using formula 'y ~ x'
EHExplore_OneContinuousAndOneCategoricalColumn_Boxplots(df22, "ptOver13")
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
EHExplore_TwoCategoricalColumns_Barcharts(df22, "ptOver13")
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
dftt <- df22 %>%
dplyr::select(target, ptOver13)
table(dftt)
## ptOver13
## target 0 1
## 0 3 234
## 1 12 217
df222 <- df22 %>%
group_by(ptOver13) %>%
summarize(ave_lstat = ave(lstat), ave_medv=ave(medv), ave_target=ave(target))
## `summarise()` has grouped output by 'ptOver13'. You can override using the `.groups` argument.
print (df222)
## # A tibble: 466 x 4
## # Groups: ptOver13 [2]
## ptOver13 ave_lstat ave_medv ave_target
## <dbl> <dbl> <dbl> <dbl>
## 1 0 7.80 37.0 0.8
## 2 0 7.80 37.0 0.8
## 3 0 7.80 37.0 0.8
## 4 0 7.80 37.0 0.8
## 5 0 7.80 37.0 0.8
## 6 0 7.80 37.0 0.8
## 7 0 7.80 37.0 0.8
## 8 0 7.80 37.0 0.8
## 9 0 7.80 37.0 0.8
## 10 0 7.80 37.0 0.8
## # ... with 456 more rows
#a <- EHModel_Regression_Logistic(df, "target")
logistic_model <- glm(target ~ .,
data = df,
family = "binomial")
str(df)
## 'data.frame': 466 obs. of 13 variables:
## $ zn : num 0 0 0 30 0 0 0 0 0 80 ...
## $ indus : num 19.58 19.58 18.1 4.93 2.46 ...
## $ chas : int 0 1 0 0 0 0 0 0 0 0 ...
## $ nox : num 0.605 0.871 0.74 0.428 0.488 0.52 0.693 0.693 0.515 0.392 ...
## $ rm : num 7.93 5.4 6.49 6.39 7.16 ...
## $ age : num 96.2 100 100 7.8 92.2 71.3 100 100 38.1 19.1 ...
## $ dis : num 2.05 1.32 1.98 7.04 2.7 ...
## $ rad : int 5 5 24 6 3 5 24 24 5 1 ...
## $ tax : int 403 403 666 300 193 384 666 666 224 315 ...
## $ ptratio: num 14.7 14.7 20.2 16.6 17.8 20.9 20.2 20.2 20.2 16.4 ...
## $ lstat : num 3.7 26.82 18.85 5.19 4.82 ...
## $ medv : num 50 13.4 15.4 23.7 37.9 26.5 5 7 22.2 20.9 ...
## $ target : int 1 1 1 0 0 0 1 1 0 0 ...
print(logistic_model)
##
## Call: glm(formula = target ~ ., family = "binomial", data = df)
##
## Coefficients:
## (Intercept) zn indus chas nox rm
## -40.822934 -0.065946 -0.064614 0.910765 49.122297 -0.587488
## age dis rad tax ptratio lstat
## 0.034189 0.738660 0.666366 -0.006171 0.402566 0.045869
## medv
## 0.180824
##
## Degrees of Freedom: 465 Total (i.e. Null); 453 Residual
## Null Deviance: 645.9
## Residual Deviance: 192 AIC: 218
#mmps(logistic_model)
#df22$ptratio = scale(df22$ptratio)
df23 <- df22 %>%
dplyr::filter(ptratio>13)
m2 <- glm(target ~ .,
data = df23,
family = "binomial")
print(m2)
##
## Call: glm(formula = target ~ ., family = "binomial", data = df23)
##
## Coefficients:
## (Intercept) zn indus chas nox rm
## -37.333521 -0.129787 0.004314 0.749600 38.875661 -0.716138
## age dis rad tax ptratio lstat
## 0.039393 0.878725 0.758820 -0.005851 0.450693 0.034661
## medv ptOver13
## 0.178087 NA
##
## Degrees of Freedom: 450 Total (i.e. Null); 438 Residual
## Null Deviance: 624.6
## Residual Deviance: 186.3 AIC: 212.3
mmps(m2)
## Error in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots) :
## A term has fewer unique covariate combinations than specified maximum degrees of freedom
## Error in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots) :
## A term has fewer unique covariate combinations than specified maximum degrees of freedom
## Error in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots) :
## A term has fewer unique covariate combinations than specified maximum degrees of freedom
## Error in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots) :
## A term has fewer unique covariate combinations than specified maximum degrees of freedom
df2 <- df %>%
mutate_at(c(1:12), scale)
EHModel_Regression_Logistic(df2, "target")
## Warning: package 'caTools' was built under R version 4.0.5
## Warning: package 'ROCR' was built under R version 4.0.5
##
## Call: glm(formula = fla, family = "binomial", data = df)
##
## Coefficients:
## (Intercept) zn indus chas nox rm
## 2.3290 -1.5408 -0.4423 0.2339 5.7309 -0.4141
## age dis rad tax ptratio lstat
## 0.9683 1.5563 5.7880 -1.0362 0.8844 0.3258
## medv
## 1.6708
##
## Degrees of Freedom: 465 Total (i.e. Null); 453 Residual
## Null Deviance: 645.9
## Residual Deviance: 192 AIC: 218
##
## Call:
## glm(formula = fla, family = "binomial", data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8464 -0.1445 -0.0017 0.0029 3.4665
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.3290 0.7195 3.237 0.00121 **
## zn -1.5408 0.8097 -1.903 0.05706 .
## indus -0.4423 0.3260 -1.357 0.17485
## chas 0.2339 0.1940 1.205 0.22803
## nox 5.7309 0.9254 6.193 5.90e-10 ***
## rm -0.4141 0.5095 -0.813 0.41637
## age 0.9683 0.3912 2.475 0.01333 *
## dis 1.5563 0.4852 3.208 0.00134 **
## rad 5.7880 1.4171 4.084 4.42e-05 ***
## tax -1.0362 0.4961 -2.089 0.03674 *
## ptratio 0.8844 0.2782 3.179 0.00148 **
## lstat 0.3258 0.3838 0.849 0.39608
## medv 1.6708 0.6310 2.648 0.00810 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 645.88 on 465 degrees of freedom
## Residual deviance: 192.05 on 453 degrees of freedom
## AIC: 218.05
##
## Number of Fisher Scoring iterations: 9
##
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 60 5
## 1 1 41
##
## Accuracy : 0.9439
## 95% CI : (0.8819, 0.9791)
## No Information Rate : 0.5701
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.8844
##
## Mcnemar's Test P-Value : 0.2207
##
## Sensitivity : 0.9836
## Specificity : 0.8913
## Pos Pred Value : 0.9231
## Neg Pred Value : 0.9762
## Prevalence : 0.5701
## Detection Rate : 0.5607
## Detection Prevalence : 0.6075
## Balanced Accuracy : 0.9375
##
## 'Positive' Class : 0
##
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## [1] "AUC: 0.982537419814683"
##
## Call:
## roc.default(response = dfPred_raw$class, predictor = dfPred_raw$predict_reg, plot = TRUE)
##
## Data: dfPred_raw$predict_reg in 61 controls (dfPred_raw$class 0) < 46 cases (dfPred_raw$class 1).
## Area under the curve: 0.9825
##
## Call: glm(formula = fla, family = "binomial", data = df)
##
## Coefficients:
## (Intercept) zn indus chas nox rm
## 2.3290 -1.5408 -0.4423 0.2339 5.7309 -0.4141
## age dis rad tax ptratio lstat
## 0.9683 1.5563 5.7880 -1.0362 0.8844 0.3258
## medv
## 1.6708
##
## Degrees of Freedom: 465 Total (i.e. Null); 453 Residual
## Null Deviance: 645.9
## Residual Deviance: 192 AIC: 218
df1 <- df %>%
dplyr::select(nox, dis, rad, ptratio, medv, target)
EHModel_Regression_Logistic(df1, "target")
##
## Call: glm(formula = fla, family = "binomial", data = df)
##
## Coefficients:
## (Intercept) nox dis rad ptratio medv
## -31.27121 37.37652 0.29535 0.51558 0.28586 0.08635
##
## Degrees of Freedom: 465 Total (i.e. Null); 460 Residual
## Null Deviance: 645.9
## Residual Deviance: 225.3 AIC: 237.3
##
## Call:
## glm(formula = fla, family = "binomial", data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.06137 -0.31295 -0.04733 0.00705 2.81210
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -31.27121 4.82619 -6.479 9.20e-11 ***
## nox 37.37652 5.56582 6.715 1.88e-11 ***
## dis 0.29535 0.14902 1.982 0.04748 *
## rad 0.51558 0.11531 4.471 7.77e-06 ***
## ptratio 0.28586 0.09877 2.894 0.00380 **
## medv 0.08635 0.02832 3.050 0.00229 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 645.88 on 465 degrees of freedom
## Residual deviance: 225.32 on 460 degrees of freedom
## AIC: 237.32
##
## Number of Fisher Scoring iterations: 8
##
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 72 12
## 1 2 69
##
## Accuracy : 0.9097
## 95% CI : (0.8531, 0.9497)
## No Information Rate : 0.5226
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.82
##
## Mcnemar's Test P-Value : 0.01616
##
## Sensitivity : 0.9730
## Specificity : 0.8519
## Pos Pred Value : 0.8571
## Neg Pred Value : 0.9718
## Prevalence : 0.4774
## Detection Rate : 0.4645
## Detection Prevalence : 0.5419
## Balanced Accuracy : 0.9124
##
## 'Positive' Class : 0
##
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## [1] "AUC: 0.97630964297631"
##
## Call:
## roc.default(response = dfPred_raw$class, predictor = dfPred_raw$predict_reg, plot = TRUE)
##
## Data: dfPred_raw$predict_reg in 74 controls (dfPred_raw$class 0) < 81 cases (dfPred_raw$class 1).
## Area under the curve: 0.9763
##
## Call: glm(formula = fla, family = "binomial", data = df)
##
## Coefficients:
## (Intercept) nox dis rad ptratio medv
## -31.27121 37.37652 0.29535 0.51558 0.28586 0.08635
##
## Degrees of Freedom: 465 Total (i.e. Null); 460 Residual
## Null Deviance: 645.9
## Residual Deviance: 225.3 AIC: 237.3
dfT <- df %>%
mutate(TaxOver600 = ifelse(tax>=600,1,0))
EHExplore_Interactions_Scatterplots(dfT, "target", "TaxOver600")
## [[1]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[2]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[3]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[4]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[5]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[6]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[7]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[8]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[9]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[10]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[11]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[12]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[13]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[14]]
## `geom_smooth()` using formula 'y ~ x'
dfT2 <- df %>%
mutate(IndusOver16 = ifelse(indus>=16,1,0))
wrap_plots(EHExplore_Interactions_Scatterplots(dfT2, "target", "IndusOver16"))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
df4 <- dfT %>%
mutate(lstatOver12 = ifelse(lstat>12,1,0), Inter_taxOver600_lstat = TaxOver600*lstat, Inter_lstatOver12_medv = lstatOver12*medv, IndusOver16 = ifelse(indus>=16,1,0))
#EHExplore_TwoCategoricalColumns_Barcharts(dfT, "TaxOver600")
wrap_plots(EHExplore_Interactions_Scatterplots(df4, "target", "lstatOver12"))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
plot(EHModel_Regression_Logistic(df4, "target"))
##
## Call: glm(formula = fla, family = "binomial", data = df)
##
## Coefficients:
## (Intercept) zn indus
## -39.437128 -0.057955 -0.112958
## chas nox rm
## 1.373814 48.225024 -0.986275
## age dis rad
## 0.023800 0.678863 0.588571
## tax ptratio lstat
## -0.001163 0.396786 0.136516
## medv TaxOver600 lstatOver12
## 0.230639 -1.293461 3.463820
## Inter_taxOver600_lstat Inter_lstatOver12_medv IndusOver16
## -0.273658 -0.237180 1.431433
##
## Degrees of Freedom: 465 Total (i.e. Null); 448 Residual
## Null Deviance: 645.9
## Residual Deviance: 176.8 AIC: 212.8
##
## Call:
## glm(formula = fla, family = "binomial", data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7834 -0.1444 -0.0034 0.0273 3.6843
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -39.437128 7.161435 -5.507 3.65e-08 ***
## zn -0.057955 0.033018 -1.755 0.079217 .
## indus -0.112958 0.098781 -1.144 0.252821
## chas 1.373814 0.845214 1.625 0.104076
## nox 48.225024 8.059928 5.983 2.19e-09 ***
## rm -0.986275 0.796630 -1.238 0.215694
## age 0.023800 0.014601 1.630 0.103098
## dis 0.678863 0.238467 2.847 0.004416 **
## rad 0.588571 0.156077 3.771 0.000163 ***
## tax -0.001163 0.004025 -0.289 0.772548
## ptratio 0.396786 0.136976 2.897 0.003770 **
## lstat 0.136516 0.081492 1.675 0.093891 .
## medv 0.230639 0.078249 2.947 0.003204 **
## TaxOver600 -1.293461 13.243964 -0.098 0.922199
## lstatOver12 3.463820 2.216992 1.562 0.118195
## Inter_taxOver600_lstat -0.273658 0.678037 -0.404 0.686504
## Inter_lstatOver12_medv -0.237180 0.103369 -2.294 0.021762 *
## IndusOver16 1.431433 1.488158 0.962 0.336109
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 645.88 on 465 degrees of freedom
## Residual deviance: 176.79 on 448 degrees of freedom
## AIC: 212.79
##
## Number of Fisher Scoring iterations: 10
##
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 42 7
## 1 6 48
##
## Accuracy : 0.8738
## 95% CI : (0.7938, 0.9311)
## No Information Rate : 0.534
## P-Value [Acc > NIR] : 1.872e-13
##
## Kappa : 0.7467
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.8750
## Specificity : 0.8727
## Pos Pred Value : 0.8571
## Neg Pred Value : 0.8889
## Prevalence : 0.4660
## Detection Rate : 0.4078
## Detection Prevalence : 0.4757
## Balanced Accuracy : 0.8739
##
## 'Positive' Class : 0
##
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## [1] "AUC: 0.964015151515151"
##
## Call:
## roc.default(response = dfPred_raw$class, predictor = dfPred_raw$predict_reg, plot = TRUE)
##
## Data: dfPred_raw$predict_reg in 48 controls (dfPred_raw$class 0) < 55 cases (dfPred_raw$class 1).
## Area under the curve: 0.964
df5 <- df4 %>%
dplyr::filter(rownames(df4) != 338)
df6 <- df5 %>%
dplyr::select(-IndusOver16) %>%
mutate(zn=log(zn+1))
plot(EHModel_Regression_Logistic(df6, "target"))
##
## Call: glm(formula = fla, family = "binomial", data = df)
##
## Coefficients:
## (Intercept) zn indus
## -4.669e+01 -6.879e-01 -4.388e-02
## chas nox rm
## 1.170e+00 5.695e+01 -1.279e+00
## age dis rad
## 3.427e-02 9.827e-01 6.641e-01
## tax ptratio lstat
## -7.533e-04 3.948e-01 1.623e-01
## medv TaxOver600 lstatOver12
## 2.970e-01 -1.398e+00 3.418e+00
## Inter_taxOver600_lstat Inter_lstatOver12_medv
## -2.948e-01 -2.526e-01
##
## Degrees of Freedom: 464 Total (i.e. Null); 448 Residual
## Null Deviance: 644.5
## Residual Deviance: 162 AIC: 196
##
## Call:
## glm(formula = fla, family = "binomial", data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.68080 -0.11809 -0.00483 0.01816 2.58674
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.669e+01 8.034e+00 -5.811 6.21e-09 ***
## zn -6.879e-01 3.002e-01 -2.292 0.021927 *
## indus -4.388e-02 5.699e-02 -0.770 0.441294
## chas 1.170e+00 8.761e-01 1.336 0.181557
## nox 5.695e+01 9.078e+00 6.273 3.54e-10 ***
## rm -1.279e+00 8.428e-01 -1.518 0.129035
## age 3.427e-02 1.595e-02 2.149 0.031637 *
## dis 9.827e-01 2.818e-01 3.487 0.000488 ***
## rad 6.641e-01 1.731e-01 3.837 0.000125 ***
## tax -7.533e-04 4.271e-03 -0.176 0.859987
## ptratio 3.948e-01 1.494e-01 2.643 0.008218 **
## lstat 1.623e-01 8.504e-02 1.909 0.056248 .
## medv 2.970e-01 8.394e-02 3.538 0.000403 ***
## TaxOver600 -1.398e+00 1.638e+01 -0.085 0.931975
## lstatOver12 3.418e+00 2.294e+00 1.489 0.136359
## Inter_taxOver600_lstat -2.948e-01 8.650e-01 -0.341 0.733264
## Inter_lstatOver12_medv -2.526e-01 1.071e-01 -2.360 0.018293 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 644.45 on 464 degrees of freedom
## Residual deviance: 161.98 on 448 degrees of freedom
## AIC: 195.98
##
## Number of Fisher Scoring iterations: 10
##
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 55 1
## 1 3 52
##
## Accuracy : 0.964
## 95% CI : (0.9103, 0.9901)
## No Information Rate : 0.5225
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9279
##
## Mcnemar's Test P-Value : 0.6171
##
## Sensitivity : 0.9483
## Specificity : 0.9811
## Pos Pred Value : 0.9821
## Neg Pred Value : 0.9455
## Prevalence : 0.5225
## Detection Rate : 0.4955
## Detection Prevalence : 0.5045
## Balanced Accuracy : 0.9647
##
## 'Positive' Class : 0
##
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## [1] "AUC: 0.993819128171763"
##
## Call:
## roc.default(response = dfPred_raw$class, predictor = dfPred_raw$predict_reg, plot = TRUE)
##
## Data: dfPred_raw$predict_reg in 58 controls (dfPred_raw$class 0) < 53 cases (dfPred_raw$class 1).
## Area under the curve: 0.9938
df11 <- df5 %>%
dplyr::select(target, zn, nox, age, dis, rad, ptratio, medv, indus, IndusOver16) %>%
mutate(inter = indus*IndusOver16)
EHModel_Regression_Logistic(df11, "target")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call: glm(formula = fla, family = "binomial", data = df)
##
## Coefficients:
## (Intercept) zn nox age dis rad
## -42.01114 -0.07453 44.50486 0.03490 0.77365 0.53383
## ptratio medv indus IndusOver16 inter
## 0.40204 0.13838 -0.05919 152.54358 -6.95711
##
## Degrees of Freedom: 464 Total (i.e. Null); 454 Residual
## Null Deviance: 644.5
## Residual Deviance: 174.5 AIC: 196.5
##
## Call:
## glm(formula = fla, family = "binomial", data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0340 -0.1419 0.0000 0.0000 3.1217
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -42.01114 7.17696 -5.854 4.81e-09 ***
## zn -0.07453 0.03557 -2.095 0.036129 *
## nox 44.50486 7.85438 5.666 1.46e-08 ***
## age 0.03490 0.01153 3.027 0.002467 **
## dis 0.77365 0.22698 3.408 0.000653 ***
## rad 0.53383 0.13471 3.963 7.41e-05 ***
## ptratio 0.40204 0.13231 3.039 0.002377 **
## medv 0.13838 0.03959 3.495 0.000474 ***
## indus -0.05919 0.09302 -0.636 0.524582
## IndusOver16 152.54358 9215.00483 0.017 0.986793
## inter -6.95711 420.96877 -0.017 0.986814
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 644.45 on 464 degrees of freedom
## Residual deviance: 174.55 on 454 degrees of freedom
## AIC: 196.55
##
## Number of Fisher Scoring iterations: 20
##
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 60 7
## 1 3 57
##
## Accuracy : 0.9213
## 95% CI : (0.86, 0.9616)
## No Information Rate : 0.5039
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.8426
##
## Mcnemar's Test P-Value : 0.3428
##
## Sensitivity : 0.9524
## Specificity : 0.8906
## Pos Pred Value : 0.8955
## Neg Pred Value : 0.9500
## Prevalence : 0.4961
## Detection Rate : 0.4724
## Detection Prevalence : 0.5276
## Balanced Accuracy : 0.9215
##
## 'Positive' Class : 0
##
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## [1] "AUC: 0.976686507936508"
##
## Call:
## roc.default(response = dfPred_raw$class, predictor = dfPred_raw$predict_reg, plot = TRUE)
##
## Data: dfPred_raw$predict_reg in 63 controls (dfPred_raw$class 0) < 64 cases (dfPred_raw$class 1).
## Area under the curve: 0.9767
##
## Call: glm(formula = fla, family = "binomial", data = df)
##
## Coefficients:
## (Intercept) zn nox age dis rad
## -42.01114 -0.07453 44.50486 0.03490 0.77365 0.53383
## ptratio medv indus IndusOver16 inter
## 0.40204 0.13838 -0.05919 152.54358 -6.95711
##
## Degrees of Freedom: 464 Total (i.e. Null); 454 Residual
## Null Deviance: 644.5
## Residual Deviance: 174.5 AIC: 196.5
Building Interactions
dfInt <- df %>%
mutate(TaxOver600 = ifelse(tax>=600,1,0)) %>%
mutate(ptOver13 = as.numeric(ifelse(ptratio>13,1,0))) %>%
mutate(lstatOver12 = ifelse(lstat>12,1,0)) %>%
mutate(IndusOver16 = ifelse(indus>=16,1,0)) %>%
mutate(ZnOver0 = ifelse(zn>0,1,0)) %>%
mutate(NoxOverPoint8 = ifelse(nox>=.8,1,0)) %>%
mutate(MedvBelow50 = ifelse(medv<50,1,0))
EHModel_Regression_Logistic(dfInt, "target")
##
## Call: glm(formula = fla, family = "binomial", data = df)
##
## Coefficients:
## (Intercept) zn indus chas nox
## -33.528688 -0.049437 -0.056815 0.964549 36.963196
## rm age dis rad tax
## -0.819963 0.038134 1.025748 0.648733 0.001652
## ptratio lstat medv TaxOver600 ptOver13
## 0.518286 0.171361 0.221716 -7.280268 -8.156292
## lstatOver12 IndusOver16 ZnOver0 NoxOverPoint8 MedvBelow50
## -1.372613 1.804373 -2.409817 7.538132 0.897657
##
## Degrees of Freedom: 465 Total (i.e. Null); 446 Residual
## Null Deviance: 645.9
## Residual Deviance: 172.8 AIC: 212.8
##
## Call:
## glm(formula = fla, family = "binomial", data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4794 -0.1493 -0.0023 0.0198 4.1463
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -33.528688 9.574531 -3.502 0.000462 ***
## zn -0.049437 0.068585 -0.721 0.471027
## indus -0.056815 0.103823 -0.547 0.584224
## chas 0.964549 0.846075 1.140 0.254275
## nox 36.963196 9.333723 3.960 7.49e-05 ***
## rm -0.819963 0.831087 -0.987 0.323831
## age 0.038134 0.014766 2.583 0.009805 **
## dis 1.025748 0.292165 3.511 0.000447 ***
## rad 0.648733 0.164036 3.955 7.66e-05 ***
## tax 0.001652 0.004013 0.412 0.680551
## ptratio 0.518286 0.152180 3.406 0.000660 ***
## lstat 0.171361 0.077858 2.201 0.027739 *
## medv 0.221716 0.085963 2.579 0.009903 **
## TaxOver600 -7.280268 4.386502 -1.660 0.096975 .
## ptOver13 -8.156292 4.564175 -1.787 0.073934 .
## lstatOver12 -1.372613 0.676083 -2.030 0.042332 *
## IndusOver16 1.804373 1.467205 1.230 0.218771
## ZnOver0 -2.409817 1.772536 -1.360 0.173978
## NoxOverPoint8 7.538132 953.240826 0.008 0.993690
## MedvBelow50 0.897657 2.107985 0.426 0.670227
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 645.88 on 465 degrees of freedom
## Residual deviance: 172.77 on 446 degrees of freedom
## AIC: 212.77
##
## Number of Fisher Scoring iterations: 16
##
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 47 6
## 1 2 38
##
## Accuracy : 0.914
## 95% CI : (0.8375, 0.9621)
## No Information Rate : 0.5269
## P-Value [Acc > NIR] : 6.31e-16
##
## Kappa : 0.8267
##
## Mcnemar's Test P-Value : 0.2888
##
## Sensitivity : 0.9592
## Specificity : 0.8636
## Pos Pred Value : 0.8868
## Neg Pred Value : 0.9500
## Prevalence : 0.5269
## Detection Rate : 0.5054
## Detection Prevalence : 0.5699
## Balanced Accuracy : 0.9114
##
## 'Positive' Class : 0
##
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## [1] "AUC: 0.963358070500928"
##
## Call:
## roc.default(response = dfPred_raw$class, predictor = dfPred_raw$predict_reg, plot = TRUE)
##
## Data: dfPred_raw$predict_reg in 49 controls (dfPred_raw$class 0) < 44 cases (dfPred_raw$class 1).
## Area under the curve: 0.9634
##
## Call: glm(formula = fla, family = "binomial", data = df)
##
## Coefficients:
## (Intercept) zn indus chas nox
## -33.528688 -0.049437 -0.056815 0.964549 36.963196
## rm age dis rad tax
## -0.819963 0.038134 1.025748 0.648733 0.001652
## ptratio lstat medv TaxOver600 ptOver13
## 0.518286 0.171361 0.221716 -7.280268 -8.156292
## lstatOver12 IndusOver16 ZnOver0 NoxOverPoint8 MedvBelow50
## -1.372613 1.804373 -2.409817 7.538132 0.897657
##
## Degrees of Freedom: 465 Total (i.e. Null); 446 Residual
## Null Deviance: 645.9
## Residual Deviance: 172.8 AIC: 212.8
a <- EHExplore_Interactions_Scatterplots(dfInt, "target", "MedvBelow50")
grid.arrange(grobs=a[1:9], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
grid.arrange(grobs=a[10:18], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
grid.arrange(grobs=a[19:20], ncol=2, nrow=1)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
a <- EHExplore_Interactions_Scatterplots(dfInt, "target", "NoxOverPoint8")
grid.arrange(grobs=a[1:9], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
grid.arrange(grobs=a[10:18], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
grid.arrange(grobs=a[19:20], ncol=2, nrow=1)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
a <- EHExplore_Interactions_Scatterplots(dfInt, "target", "ZnOver0")
grid.arrange(grobs=a[1:9], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
grid.arrange(grobs=a[10:18], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
grid.arrange(grobs=a[19:20], ncol=2, nrow=1)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
a <- EHExplore_Interactions_Scatterplots(dfInt, "target", "IndusOver16")
grid.arrange(grobs=a[1:9], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
grid.arrange(grobs=a[10:18], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
grid.arrange(grobs=a[19:20], ncol=2, nrow=1)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
a <- EHExplore_Interactions_Scatterplots(dfInt, "target", "lstatOver12")
grid.arrange(grobs=a[1:9], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
grid.arrange(grobs=a[10:18], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
grid.arrange(grobs=a[19:20], ncol=2, nrow=1)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
a <- EHExplore_Interactions_Scatterplots(dfInt, "target", "ptOver13")
grid.arrange(grobs=a[1:9], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
grid.arrange(grobs=a[10:18], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
grid.arrange(grobs=a[19:20], ncol=2, nrow=1)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
a <- EHExplore_Interactions_Scatterplots(dfInt, "target", "TaxOver600")
grid.arrange(grobs=a[1:9], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
grid.arrange(grobs=a[10:18], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
grid.arrange(grobs=a[19:20], ncol=2, nrow=1)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
#LstatOver12
#IndusOver16
#ptOver13
#ZnOver0
#NoxOverPoint8
#radOver15
#TaxOver600
#ptratioBelow20
#medvBelow50